# This script reproduces figures 21, 22, 23 # 

rm(list=ls())
options(digits=4)
pkg <- c("PanelMatch", "ggplot2", "plm", 
         "matrixStats",
         "foreign", 
         "stringi", "stringdist",
         "DataCombine","lmtest", "multiwayvcov",
         "data.table",
         "dplyr", 
         "stargazer",
         "clubSandwich", "mediation",
         "doBy")
lapply(pkg, require, character.only = TRUE)

# load datasets
load("city_month.RData")

### pollution ###
# nolag main
results_nolag <- list()

formula <- as.formula(paste0(paste0("log_PM25", "_f", 1), 
                             "~ tour +" ,
                             paste0("log_PM25_w", "_f", 1),
                             "+ log_fia_l1 + log_fia_l1_missing + 
                             log_rdi_l1 + log_rdi_l1_missing + 
                             msec2currentsec_l1 + msec2currentsec_l1_missing + 
                             msec2currentgvn_l1 + msec2currentgvn_l1_missing + 
                             mayor2currentsec_l1 + mayor2currentsec_l1_missing + 
                             mayor2currentgvn_l1 + mayor2currentgvn_l1_missing + 
                             log_import_l1 +
                             log_export_l1 +
                             log_ypschool_l1  +
                             log_ysschool_l1   +
                             log_unemp_l1         +
                             log_wages_l1 +
                             log_total_gdp_l1 +
                             log_fdi_l1 + envi_inspection + 
                             log_gdppc_l1 + as.factor(city_id) + 
                             as.factor(time)"))


model_pollution_con <- lm(formula = formula,
                          data = d3)
vcov_pollution_con <- clubSandwich::vcovCR(model_pollution_con, type="CR1S", cluster = d3$city_id)

se_pollution_con <- coeftest(model_pollution_con, vcov_pollution_con)


lower_pollution_con_95 <- se_pollution_con["tour", "Estimate"] - qnorm(.975) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_95 <- se_pollution_con["tour", "Estimate"] + qnorm(.975) * se_pollution_con["tour", "Std. Error"]

lower_pollution_con_90 <- se_pollution_con["tour", "Estimate"] - qnorm(.95) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_90 <- se_pollution_con["tour", "Estimate"] + qnorm(.95) * se_pollution_con["tour", "Std. Error"]

est_pollution_con <- se_pollution_con["tour", "Estimate"]
results_nolag <- list("lower_pollution_con_95" = lower_pollution_con_95,
                      "upper_pollution_con_95" = upper_pollution_con_95,
                      "lower_pollution_con_90" = lower_pollution_con_90,
                      "upper_pollution_con_90" = upper_pollution_con_90,
                      "est_pollution_con" = est_pollution_con)


# lag main
formula <- as.formula(paste0(paste0("log_PM25", "_f", 1), 
                             "~ tour +" ,
                             paste0("log_PM25_w", "_f", 1),
                             "+  log_PM25_l1 + log_PM25_l2 + 
                             log_PM25_l3 + 
                             log_fia_l1 + log_fia_l1_missing + 
                             log_rdi_l1 + log_rdi_l1_missing + 
                             msec2currentsec_l1 + msec2currentsec_l1_missing + 
                             msec2currentgvn_l1 + msec2currentgvn_l1_missing + 
                             mayor2currentsec_l1 + mayor2currentsec_l1_missing + 
                             mayor2currentgvn_l1 + mayor2currentgvn_l1_missing + 
                             log_import_l1 +
                             log_export_l1 +
                             log_ypschool_l1  +
                             log_ysschool_l1   +
                             log_unemp_l1         +
                             log_wages_l1 +
                             log_total_gdp_l1 +
                             log_fdi_l1 + envi_inspection + 
                             log_gdppc_l1 + as.factor(city_id) + 
                             as.factor(time)"))


model_pollution_con <- lm(formula = formula,
                          data = d3)
vcov_pollution_con <- clubSandwich::vcovCR(model_pollution_con, type="CR1S", cluster = d3$city_id)
se_pollution_con <- coeftest(model_pollution_con, vcov_pollution_con)


lower_pollution_con_95 <- se_pollution_con["tour", "Estimate"] - qnorm(.975) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_95 <- se_pollution_con["tour", "Estimate"] + qnorm(.975) * se_pollution_con["tour", "Std. Error"]

lower_pollution_con_90 <- se_pollution_con["tour", "Estimate"] - qnorm(.95) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_90 <- se_pollution_con["tour", "Estimate"] + qnorm(.95) * se_pollution_con["tour", "Std. Error"]

est_pollution_con <- se_pollution_con["tour", "Estimate"]
results_lag <- list("lower_pollution_con_95" = lower_pollution_con_95,
                    "upper_pollution_con_95" = upper_pollution_con_95,
                    "lower_pollution_con_90" = lower_pollution_con_90,
                    "upper_pollution_con_90" = upper_pollution_con_90,
                    "est_pollution_con" = est_pollution_con)

results <- list(results_nolag, results_lag)

pdf("output/Figure21.pdf")
plot(x = 0, xlab = "", ylab = "Effect Estimates", 
     pch = "",
     ylim = c(-.01, .05),
     xlim = c(0.95, 1.15),
     xaxt = "n",
     main = "Effects of Inspection \n on Pollution Next Month")
lines(c(1,1), c(results[[1]]$lower_pollution_con_95, 
                results[[1]]$upper_pollution_con_95))
lines(c(1,1), c(results[[1]]$lower_pollution_con_90, 
                results[[1]]$upper_pollution_con_90), 
      lwd = 5, col = "grey")
points(1, results[[1]]$est_pollution_con, pch = 19)
mtext(side = 1, "Not controlling for \n lagged outcomes", at = 1, 
      outer = F,
      line = 1.2)

lines(c(1.1,1.1), c(results[[2]]$lower_pollution_con_95, 
                    results[[2]]$upper_pollution_con_95))
lines(c(1.1,1.1), c(results[[2]]$lower_pollution_con_90, 
                    results[[2]]$upper_pollution_con_90), 
      lwd = 5, col = "grey")
points(1.1, results[[2]]$est_pollution_con, pch = 19)
mtext(side = 1, "Controlling for \n lagged outcomes", at = 1.1, 
      outer = F,
      line = 1.2)

abline(h = 0, lty = 3)
dev.off()

# no lag pyear
formula <- as.formula(paste0(paste0("log_PM25", "_f", 1), 
                             "~ tour +" ,
                             paste0("log_PM25_w", "_f", 1),
                             "+ log_fia_l1 + log_fia_l1_missing + 
                             log_rdi_l1 + log_rdi_l1_missing + 
                             msec2currentsec_l1 + msec2currentsec_l1_missing + 
                             msec2currentgvn_l1 + msec2currentgvn_l1_missing + 
                             mayor2currentsec_l1 + mayor2currentsec_l1_missing + 
                             mayor2currentgvn_l1 + mayor2currentgvn_l1_missing + 
                             log_import_l1 +
                             log_export_l1 +
                             log_ypschool_l1  +
                             log_ysschool_l1   +
                             log_unemp_l1         +
                             log_wages_l1 +
                             log_total_gdp_l1 +
                             log_fdi_l1 + envi_inspection + 
                             log_gdppc_l1 + as.factor(city_id) + 
                             as.factor(time) + 
                             as.factor(province_id) * as.factor(year)"))


model_pollution_con <- lm(formula = formula,
                          data = d3)
vcov_pollution_con <- clubSandwich::vcovCR(model_pollution_con, type="CR1S", cluster = d3$city_id)
se_pollution_con <- coeftest(model_pollution_con, vcov_pollution_con)


lower_pollution_con_95 <- se_pollution_con["tour", "Estimate"] - qnorm(.975) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_95 <- se_pollution_con["tour", "Estimate"] + qnorm(.975) * se_pollution_con["tour", "Std. Error"]

lower_pollution_con_90 <- se_pollution_con["tour", "Estimate"] - qnorm(.95) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_90 <- se_pollution_con["tour", "Estimate"] + qnorm(.95) * se_pollution_con["tour", "Std. Error"]

est_pollution_con <- se_pollution_con["tour", "Estimate"]
results_nolag <- list("lower_pollution_con_95" = lower_pollution_con_95,
                      "upper_pollution_con_95" = upper_pollution_con_95,
                      "lower_pollution_con_90" = lower_pollution_con_90,
                      "upper_pollution_con_90" = upper_pollution_con_90,
                      "est_pollution_con" = est_pollution_con)

formula <- as.formula(paste0(paste0("log_PM25", "_f", 1), 
                             "~ tour +" ,
                             paste0("log_PM25_w", "_f", 1),
                             "+  log_PM25_l1 + log_PM25_l2 + 
                             log_PM25_l3 + 
                             log_fia_l1 + log_fia_l1_missing + 
                             log_rdi_l1 + log_rdi_l1_missing + 
                             msec2currentsec_l1 + msec2currentsec_l1_missing + 
                             msec2currentgvn_l1 + msec2currentgvn_l1_missing + 
                             mayor2currentsec_l1 + mayor2currentsec_l1_missing + 
                             mayor2currentgvn_l1 + mayor2currentgvn_l1_missing + 
                             log_import_l1 +
                             log_export_l1 +
                             log_ypschool_l1  +
                             log_ysschool_l1   +
                             log_unemp_l1         +
                             log_wages_l1 +
                             log_total_gdp_l1 +
                             log_fdi_l1 + envi_inspection + 
                             log_gdppc_l1 + as.factor(city_id) + 
                             as.factor(time) + 
                             as.factor(province_id) * as.factor(year)"))


model_pollution_con <- lm(formula = formula,
                          data = d3)
vcov_pollution_con <- clubSandwich::vcovCR(model_pollution_con, type="CR1S", cluster = d3$city_id)
se_pollution_con <- coeftest(model_pollution_con, vcov_pollution_con)

lower_pollution_con_95 <- se_pollution_con["tour", "Estimate"] - qnorm(.975) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_95 <- se_pollution_con["tour", "Estimate"] + qnorm(.975) * se_pollution_con["tour", "Std. Error"]

lower_pollution_con_90 <- se_pollution_con["tour", "Estimate"] - qnorm(.95) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_90 <- se_pollution_con["tour", "Estimate"] + qnorm(.95) * se_pollution_con["tour", "Std. Error"]

est_pollution_con <- se_pollution_con["tour", "Estimate"]
results_lag <- list("lower_pollution_con_95" = lower_pollution_con_95,
                    "upper_pollution_con_95" = upper_pollution_con_95,
                    "lower_pollution_con_90" = lower_pollution_con_90,
                    "upper_pollution_con_90" = upper_pollution_con_90,
                    "est_pollution_con" = est_pollution_con)

results <- list(results_nolag, results_lag)

pdf("output/Figure22.pdf")
plot(x = 0, xlab = "", ylab = "Effect Estimates", 
     pch = "",
     ylim = c(-.01, .05),
     xlim = c(0.95, 1.15),
     xaxt = "n",
     main = "Effects of Inspection \n on Pollution Next Month")
lines(c(1,1), c(results[[1]]$lower_pollution_con_95, 
                results[[1]]$upper_pollution_con_95))
lines(c(1,1), c(results[[1]]$lower_pollution_con_90, 
                results[[1]]$upper_pollution_con_90), 
      lwd = 5, col = "grey")
points(1, results[[1]]$est_pollution_con, pch = 19)
mtext(side = 1, "Not controlling for \n lagged outcomes", at = 1, 
      outer = F,
      line = 1.2)

lines(c(1.1,1.1), c(results[[2]]$lower_pollution_con_95, 
                    results[[2]]$upper_pollution_con_95))
lines(c(1.1,1.1), c(results[[2]]$lower_pollution_con_90, 
                    results[[2]]$upper_pollution_con_90), 
      lwd = 5, col = "grey")
points(1.1, results[[2]]$est_pollution_con, pch = 19)
mtext(side = 1, "Controlling for \n lagged outcomes", at = 1.1, 
      outer = F,
      line = 1.2)
abline(h = 0, lty = 3)
dev.off()

## dropping never-treated ##
tours <- tapply(d3$tour, d3$city_id, mean) 
nts <- as.numeric(names(tours[tours == 0]) )
d33 <- d3[!d3$city_id %in% nts, ]

results_nolag <- list()

formula <- as.formula(paste0(paste0("log_PM25", "_f", 1), 
                             "~ tour +" ,
                             paste0("log_PM25_w", "_f", 1),
                             "+ log_fia_l1 + log_fia_l1_missing + 
                             log_rdi_l1 + log_rdi_l1_missing + 
                             msec2currentsec_l1 + msec2currentsec_l1_missing + 
                             msec2currentgvn_l1 + msec2currentgvn_l1_missing + 
                             mayor2currentsec_l1 + mayor2currentsec_l1_missing + 
                             mayor2currentgvn_l1 + mayor2currentgvn_l1_missing + 
                             log_import_l1 +
                             log_export_l1 +
                             log_ypschool_l1  +
                             log_ysschool_l1   +
                             log_unemp_l1         +
                             log_wages_l1 +
                             log_total_gdp_l1 +
                             log_fdi_l1 + envi_inspection + 
                             log_gdppc_l1 + as.factor(city_id) + 
                             as.factor(time)"))


model_pollution_con <- lm(formula = formula,
                          data = d33)
vcov_pollution_con <- clubSandwich::vcovCR(model_pollution_con, type="CR1S", cluster = d33$city_id)
se_pollution_con <- coeftest(model_pollution_con, vcov_pollution_con)

lower_pollution_con_95 <- se_pollution_con["tour", "Estimate"] - qnorm(.975) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_95 <- se_pollution_con["tour", "Estimate"] + qnorm(.975) * se_pollution_con["tour", "Std. Error"]

lower_pollution_con_90 <- se_pollution_con["tour", "Estimate"] - qnorm(.95) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_90 <- se_pollution_con["tour", "Estimate"] + qnorm(.95) * se_pollution_con["tour", "Std. Error"]

est_pollution_con <- se_pollution_con["tour", "Estimate"]
results_nolag <- list("lower_pollution_con_95" = lower_pollution_con_95,
                      "upper_pollution_con_95" = upper_pollution_con_95,
                      "lower_pollution_con_90" = lower_pollution_con_90,
                      "upper_pollution_con_90" = upper_pollution_con_90,
                      "est_pollution_con" = est_pollution_con)


# lag main
formula <- as.formula(paste0(paste0("log_PM25", "_f", 1), 
                             "~ tour +" ,
                             paste0("log_PM25_w", "_f", 1),
                             "+  log_PM25_l1 + log_PM25_l2 + 
                             log_PM25_l3 + 
                             log_fia_l1 + log_fia_l1_missing + 
                             log_rdi_l1 + log_rdi_l1_missing + 
                             msec2currentsec_l1 + msec2currentsec_l1_missing + 
                             msec2currentgvn_l1 + msec2currentgvn_l1_missing + 
                             mayor2currentsec_l1 + mayor2currentsec_l1_missing + 
                             mayor2currentgvn_l1 + mayor2currentgvn_l1_missing + 
                             log_import_l1 +
                             log_export_l1 +
                             log_ypschool_l1  +
                             log_ysschool_l1   +
                             log_unemp_l1         +
                             log_wages_l1 +
                             log_total_gdp_l1 +
                             log_fdi_l1 + envi_inspection + 
                             log_gdppc_l1 + as.factor(city_id) + 
                             as.factor(time)"))


model_pollution_con <- lm(formula = formula,
                          data = d33)
vcov_pollution_con <- clubSandwich::vcovCR(model_pollution_con, type="CR1S", cluster = d33$city_id)
se_pollution_con <- coeftest(model_pollution_con, vcov_pollution_con)

lower_pollution_con_95 <- se_pollution_con["tour", "Estimate"] - qnorm(.975) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_95 <- se_pollution_con["tour", "Estimate"] + qnorm(.975) * se_pollution_con["tour", "Std. Error"]

lower_pollution_con_90 <- se_pollution_con["tour", "Estimate"] - qnorm(.95) * se_pollution_con["tour", "Std. Error"]
upper_pollution_con_90 <- se_pollution_con["tour", "Estimate"] + qnorm(.95) * se_pollution_con["tour", "Std. Error"]

est_pollution_con <- se_pollution_con["tour", "Estimate"]
results_lag <- list("lower_pollution_con_95" = lower_pollution_con_95,
                    "upper_pollution_con_95" = upper_pollution_con_95,
                    "lower_pollution_con_90" = lower_pollution_con_90,
                    "upper_pollution_con_90" = upper_pollution_con_90,
                    "est_pollution_con" = est_pollution_con)

results <- list(results_nolag, results_lag)

pdf("output/Figure23.pdf")
plot(x = 0, xlab = "", ylab = "Effect Estimates", 
     pch = "",
     ylim = c(-.01, .05),
     xlim = c(0.95, 1.15),
     xaxt = "n",
     main = "Effects of Inspection \n on Pollution Next Month")
lines(c(1,1), c(results[[1]]$lower_pollution_con_95, 
                results[[1]]$upper_pollution_con_95))
lines(c(1,1), c(results[[1]]$lower_pollution_con_90, 
                results[[1]]$upper_pollution_con_90), 
      lwd = 5, col = "grey")
points(1, results[[1]]$est_pollution_con, pch = 19)
mtext(side = 1, "Not controlling for \n lagged outcomes", at = 1, 
      outer = F,
      line = 1.2)

lines(c(1.1,1.1), c(results[[2]]$lower_pollution_con_95, 
                    results[[2]]$upper_pollution_con_95))
lines(c(1.1,1.1), c(results[[2]]$lower_pollution_con_90, 
                    results[[2]]$upper_pollution_con_90), 
      lwd = 5, col = "grey")
points(1.1, results[[2]]$est_pollution_con, pch = 19)
mtext(side = 1, "Controlling for \n lagged outcomes", at = 1.1, 
      outer = F,
      line = 1.2)

abline(h = 0, lty = 3)
dev.off()